#!/usr/bin/perl -w
use strict;

# GPI hits file
my $hits_file;

# CSV file which lists genes we used
my $csv_file;

($hits_file,$csv_file) = @ARGV;

my %keep_genes;

open(my $fh => $csv_file) || die "cannot open $csv_file: $!";
# this file has all the names, comma separated, we'll just keep them all, not on a per-species list
while(<$fh>) {
    chomp;
    my @row = split(/,/,$_);
    for my $name ( @row ) {
	next if ! defined $name || length($name) == 0;
	$keep_genes{uc $name} = 1;
	# or more simply
	# $keep_genes{$name}++;
    }
}

close($fh);

open($fh => $hits_file ) || die "cannot open $hits_file: $!\n";
my $header = <$fh>;
print $header;
while(<$fh>) {
    my ($name,$length,$c_position,$cleaved_len, $prob) = split;
    
    # if( $keep_genes{$name} == 1 ) {	    
    if( $keep_genes{$name} ) {	    	
	print $_;
    }
}
